#############################################
#############################################
### REPLICATION MATERIAL                  ###  
### Gender quotas and political Knowledge ###
### Create main dataset                   ###
#############################################
#############################################

# Load libraries
library(tidyverse)
library(haven)
library(naniar)
library(labelled)
library(rio)
library(stargazer)


#######################
## LOAD RAW DATASET  ##
#######################

######################################################################################
### NOTE: The raw datasets can be dowloaded from the Eurobarometer Data Services   ###
### hosted by the GESIS institute, here                                            ###
### https://www.gesis.org/en/eurobarometer-data-service#c122009                    ###
### The data is free of charge for academic researchers                            ###
### The raw names below are the original names from the Eurobarometer data service ###
### The supplementary appendix contains further details on the vawes used          ###
######################################################################################

eb_1992_raw <-read_dta("data/raw/ZA2141_v1-1-0.dta")
eb_1993_raw <-read_dta("data/raw/ZA2346_v1-1-0.dta")
eb_1994_raw <-read_dta("data/raw/ZA2491_v1-1-0.dta")
eb_1996_raw <-read_dta("data/raw/ZA2828_v1-0-1.dta")
eb_1998_raw <-read_dta("data/raw/ZA3085_v1-1-0.dta")
eb_1999_raw <-read_dta("data/raw/ZA3204_v1-0-1.dta")
eb_2002_raw <-read_dta("data/raw/ZA3693_v1-0-1.dta")
eb_2003_raw <-read_dta("data/raw/ZA4056_v1-0-1.dta")
eb_2004_raw <-read_dta("data/raw/ZA4229_v1-1-0.dta")
eb_2005_raw <-read_dta("data/raw/ZA4414_v1-1-0.dta")
eb_2006_raw <-read_dta("data/raw/ZA4526_v1-0-1.dta")
eb_2007_raw <-read_dta("data/raw/ZA4565_v4-0-1.dta")
eb_2008_raw <-read_dta("data/raw/ZA4819_v3-0-2.dta")
eb_2009_raw <-read_dta("data/raw/ZA4994_v3-0-0.dta")
eb_2010_raw <-read_dta("data/raw/ZA5449_v2-2-0.dta")
eb_2011_raw <-read_dta("data/raw/ZA5567_v2-0-1.dta")
eb_2012_raw <-read_dta("data/raw/ZA5685_v2-0-0.dta")
eb_2013_raw <-read_dta("data/raw/ZA5876_v2-0-0.dta")
eb_2014_raw <-read_dta("data/raw/ZA5928_v3-0-0.dta")
eb_2015_raw <-read_dta("data/raw/ZA6643_v4-0-0.dta")
eb_2016_raw <-read_dta("data/raw/ZA6788_v1-3-0.dta")
eb_2017_raw <-read_dta("data/raw/ZA6928_v1-0-0.dta")
eb_2018_raw <-read_dta("data/raw/ZA7489_v1-0-0.dta")

###############################################
## Reduce raw datasets to relevant variables ##
###############################################

eb_1992_small_raw <- select(eb_1992_raw, v3, v4, v5, v615, v616, v611,
                            v245:v254, v602, v18)
eb_1993_small_raw <- select(eb_1993_raw, v3, v4, v5, v686, v687, v683, 
                            v292:v295, v672, v20)
eb_1994_small_raw <- select(eb_1994_raw, v3, v4, v5, v325, v326, v323, 
                            v64:v67, v286)
eb_1996_small_raw <- select(eb_1996_raw, v4, v5, v6, v553, v554, v551, 
                            v432:v441, v545, v37)
eb_1998_small_raw <- select(eb_1998_raw, v4, v5, v6, v366, v367, v364, 
                            v144:v150, v362)
eb_1999_small_raw <- select(eb_1999_raw, v4, v5, v6, v803, v804, v801, 
                            v612:v615, v782)
eb_2002_small_raw <- select(eb_2002_raw, v4, v5, v6, v419, v420, v417, 
                            v296:v300, v414, v40)
eb_2003_small_raw <- select(eb_2003_raw, v4, v5, v6, v330, v331, v328, 
                            v151:v160, v308)
eb_2004_small_raw <- select(eb_2004_raw, v4, v5, v6, v428, v429, v426, 
                            v211:v216, v423, v69)
eb_2005_small_raw <- select(eb_2005_raw, v4, v5, v6, v439, v440, v437,
                            v221:v224, v434, v75)
eb_2006_small_raw <- select(eb_2006_raw, v4, v5, v6, v462, v463, v460,
                            v143:v145, v457, v75)
eb_2007_small_raw <- select(eb_2007_raw, v4, v5, v6, v420, v421, v418, 
                            v290:v292, v415, v87)
eb_2008_small_raw <- select(eb_2008_raw, v4, v5, v6, v670, v671, v668, 
                            v284:v287, v665, v88)
eb_2009_small_raw <- select(eb_2009_raw, v4, v5, v6, v584, v585, v582, 
                            v266:v268, v578, v84)
eb_2010_small_raw <- select(eb_2010_raw, v4, v5, v6, v602, v603, v600, 
                            v335:v337,v595,v91)
eb_2011_small_raw <- select(eb_2011_raw, survey, uniqid, country, d10,
                            d11, d8, qa15_1, qa15_3, qa1)
eb_2012_small_raw <- select(eb_2012_raw, survey, uniqid, country, d10,
                            d11, d8, qa17_1:qa17_3,qa1)
eb_2013_small_raw <- select(eb_2013_raw, survey, uniqid, country, d10, 
                            d11, d8, qa16_1:qa16_3,qa1)
eb_2014_small_raw <- select(eb_2014_raw, survey, uniqid, country, d10, 
                            d11, d8, qa16_1, qa16_2, qa16_3,
                            d1r1,qa1)
eb_2015_small_raw <- select(eb_2015_raw, survey, uniqid, country, d10,
                            d11, d8, qa14_1:qa14_3, d1r1, d70)
eb_2016_small_raw <- select(eb_2016_raw, survey, uniqid, country, d10, 
                            d11, d8, qa16_1:qa16_3, d1r1, d70)
eb_2017_small_raw <- select(eb_2017_raw, survey, uniqid, country, d10, 
                            d11, d8, qa15_1:qa15_3, d1r1, d70)
eb_2018_small_raw <- select(eb_2018_raw, survey, uniqid, country, d10, 
                            d11, d8, qa14_1:qa14_3, d1r1, d70)

#############################
## Save small raw datasets ##
#############################

write_dta(eb_1992_small_raw, "data/analysis_2/eb_1992_small_raw.dta")
write_dta(eb_1993_small_raw, "data/analysis_2/eb_1993_small_raw.dta")
write_dta(eb_1994_small_raw, "data/analysis_2/eb_1994_small_raw.dta")
write_dta(eb_1996_small_raw, "data/analysis_2/eb_1996_small_raw.dta")
write_dta(eb_1998_small_raw, "data/analysis_2/eb_1998_small_raw.dta")
write_dta(eb_1999_small_raw, "data/analysis_2/eb_1999_small_raw.dta")
write_dta(eb_2002_small_raw, "data/analysis_2/eb_2002_small_raw.dta")
write_dta(eb_2003_small_raw, "data/analysis_2/eb_2003_small_raw.dta")
write_dta(eb_2004_small_raw, "data/analysis_2/eb_2004_small_raw.dta")
write_dta(eb_2005_small_raw, "data/analysis_2/eb_2005_small_raw.dta")
write_dta(eb_2006_small_raw, "data/analysis_2/eb_2006_small_raw.dta")
write_dta(eb_2007_small_raw, "data/analysis_2/eb_2007_small_raw.dta")
write_dta(eb_2008_small_raw, "data/analysis_2/eb_2008_small_raw.dta")
write_dta(eb_2009_small_raw, "data/analysis_2/eb_2009_small_raw.dta")
write_dta(eb_2010_small_raw, "data/analysis_2/eb_2010_small_raw.dta")
write_dta(eb_2011_small_raw, "data/analysis_2/eb_2011_small_raw.dta")
write_dta(eb_2012_small_raw, "data/analysis_2/eb_2012_small_raw.dta")
write_dta(eb_2013_small_raw, "data/analysis_2/eb_2013_small_raw.dta")
write_dta(eb_2014_small_raw, "data/analysis_2/eb_2014_small_raw.dta")
write_dta(eb_2015_small_raw, "data/analysis_2/eb_2015_small_raw.dta")
write_dta(eb_2016_small_raw, "data/analysis_2/eb_2016_small_raw.dta")
write_dta(eb_2017_small_raw, "data/analysis_2/eb_2017_small_raw.dta")
write_dta(eb_2018_small_raw, "data/analysis_2/eb_2018_small_raw.dta")

#######################
### WRANGLE EB 1992 ###
#######################

# Load the raw dataset
eb_1992_raw_df <- read_dta("data/analysis/eb_1992_small_raw.dta")

# Wrangle the dataset
eb_1992_df <- eb_1992_raw_df %>%
  filter(v5 != 14) %>%
  mutate(v3 = "1992") %>%
  mutate(v5 = ifelse(v5 == 13, 4, v5)) %>% 
  mutate(ID = paste0(v3,v4)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v615 = ifelse(v615 == 2, 0, v615)) %>% 
  mutate(v611 = v611 - 6) %>%
  mutate(v611 = ifelse(v611 < 0, 1, v611)) %>%
  mutate(v611 = ifelse(v611 == 92, v616 - 6, v611)) %>%
  mutate(v611 = ifelse(v611 >= 25, 25, v611)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 0,8,9 to 0 - incorrect)
  mutate(v245 = ifelse(v245 %in% c(0,8,9,NA), 0, v245)) %>% 
  mutate(v246 = ifelse(v246 %in% c(0,8,9,NA), 0, v246)) %>% 
  mutate(v247 = ifelse(v247 %in% c(0,8,9,NA), 0, v247)) %>% 
  mutate(v248 = ifelse(v248 %in% c(0,8,9,NA), 0, v248)) %>% 
  mutate(v249 = ifelse(v249 %in% c(0,8,9,NA), 0, v249)) %>% 
  mutate(v250 = ifelse(v250 %in% c(0,8,9,NA), 0, v250)) %>% 
  mutate(v251 = ifelse(v251 %in% c(0,8,9,NA), 0, v251)) %>% 
  mutate(v252 = ifelse(v252 %in% c(0,8,9,NA), 0, v252)) %>% 
  mutate(v253 = ifelse(v253 %in% c(0,8,9,NA), 0, v253)) %>% 
  mutate(v254 = ifelse(v254 %in% c(0,8,9,NA), 0, v254)) %>% 
  rename(wave_id = v4,
         year = v3,
         country = v5,
         gender = v615,
         age = v616,
         education = v611,
         know_q1 = v245,
         know_q2 = v246,
         know_q3 = v247,
         know_q4 = v248,
         know_q5 = v249,
         know_q6 = v250,
         know_q7 = v251,
         know_q8 = v252,
         know_q9 = v253,
         know_q10 = v254,
         left_right_placement = v602,
         life_satisfaction = v18) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_1992_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 1993 ###
#######################

# Load the raw dataset
eb_1993_raw_df <- read_dta("data/analysis/eb_1993_small_raw.dta")

# Wrangle the dataset
eb_1993_df <- eb_1993_raw_df %>%
  filter(v5 != 14 & v5 != 15) %>%
  mutate(v3 = "1993") %>%
  mutate(v5 = ifelse(v5 == 13, 4, v5)) %>% 
  mutate(ID = paste0(v3,v4)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v686 = ifelse(v686 == 2, 0, v686)) %>% 
  mutate(v683 = v683 - 6) %>%
  mutate(v683 = ifelse(v683 < 0, 1, v683)) %>%
  mutate(v683 = ifelse(v683 == 92, v687 - 6, v683)) %>%
  mutate(v683 = ifelse(v683 >= 25, 25, v683)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v292 = ifelse(v292 %in% c(2,3,9,NA), 0, v292)) %>% 
  mutate(v293 = ifelse(v293 %in% c(2,3,9,NA), 0, v293)) %>%
  mutate(v294 = ifelse(v294 %in% c(2,3,9,NA), 0, v294)) %>%
  mutate(v295 = ifelse(v295 %in% c(2,3,9,NA), 0, v295)) %>% 
  rename(wave_id = v4,
         year = v3,
         country = v5,
         gender = v686,
         age = v687,
         education = v683,
         know_q1 = v292,
         know_q2 = v293,
         know_q3 = v294,
         know_q4 = v295,
         left_right_placement = v672,
         life_satisfaction = v20) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_1993_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 1994 ###
#######################

# Load the raw dataset
eb_1994_raw_df <- read_dta("data/analysis/eb_1994_small_raw.dta")

# Wrangle the dataset
eb_1994_df <- eb_1994_raw_df %>%
  mutate(v3 = "1994") %>%
  mutate(v5 = ifelse(v5 == 13, 4, v5)) %>% 
  mutate(ID = paste0(v3,v4)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v325 = ifelse(v325 == 2, 0, v325)) %>% 
  mutate(v323 = v323 - 6) %>%
  mutate(v323 = ifelse(v323 < 0, 1, v323)) %>%
  mutate(v323 = ifelse(v323 == 92, v326 - 6, v323)) %>%
  mutate(v323 = ifelse(v323 >= 25, 25, v323)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v64 = ifelse(v64 %in% c(2,3,NA), 0, v64)) %>% 
  mutate(v65 = ifelse(v65 %in% c(2,3,NA), 0, v65)) %>%
  mutate(v66 = ifelse(v66 %in% c(2,3,NA), 0, v66)) %>%
  mutate(v67 = ifelse(v67 %in% c(2,3,NA), 0, v67)) %>%
  rename(wave_id = v4,
         year = v3,
         country = v5,
         gender = v325,
         age = v326,
         education = v323,
         know_q1 = v64,
         know_q2 = v65,
         know_q3 = v66,
         know_q4 = v67,
         left_right_placement = v286) %>%
  write_rds("data/analysis/eb_1994_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 1996 ###
#######################

# Load the raw dataset
eb_1996_raw_df <- read_dta("data/analysis/eb_1996_small_raw.dta")

# Wrangle the dataset
eb_1996_df <- eb_1996_raw_df %>%
  filter(v6 != 14 & v6 != 15 & v6 != 16) %>%
  mutate(v4 = "1996") %>%
  mutate(v6 = ifelse(v6 == 13, 4, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v553 = ifelse(v553 == 2, 0, v553)) %>%
  mutate(v551 = v551 - 6) %>%
  mutate(v551 = ifelse(v551 == -6, v554 - 6, v551)) %>%
  mutate(v551 = ifelse(v551 >= 25, 25, v551)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3,NA to 0 - incorrect)
  mutate(v432 = ifelse(v432 %in% c(2,3,NA), 0, v432)) %>% 
  mutate(v433 = ifelse(v433 %in% c(2,3,NA), 0, v433)) %>%
  mutate(v434 = ifelse(v434 %in% c(2,3,NA), 0, v434)) %>%
  mutate(v435 = ifelse(v435 %in% c(2,3,NA), 0, v435)) %>% 
  mutate(v436 = ifelse(v436 %in% c(2,3,NA), 0, v436)) %>%
  mutate(v437 = ifelse(v437 %in% c(2,3,NA), 0, v437)) %>%
  mutate(v438 = ifelse(v438 %in% c(2,3,NA), 0, v438)) %>%
  mutate(v439 = ifelse(v439 %in% c(2,3,NA), 0, v439)) %>%
  mutate(v440 = ifelse(v440 %in% c(2,3,NA), 0, v440)) %>%
  mutate(v441 = ifelse(v441 %in% c(2,3,NA), 0, v441)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v553,
         age = v554,
         education = v551,
         know_q1 = v432,
         know_q2 = v433, 
         know_q3 = v434, 
         know_q4 = v435,
         know_q5 = v436, 
         know_q6 = v437, 
         know_q7 = v438, 
         know_q8 = v439, 
         know_q9 = v440, 
         know_q10 = v441, 
         left_right_placement = v545,
         life_satisfaction = v37) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_1996_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 1998 ###
#######################

# Load the raw dataset
eb_1998_raw_df <- read_dta("data/analysis/eb_1998_small_raw.dta")

# Wrangle the dataset
eb_1998_df <- eb_1998_raw_df %>%
  filter(v6 != 14 & v6 != 15 & v6 != 16 & v6 != 17) %>%
  mutate(v4 = "1998") %>%
  mutate(v6 = ifelse(v6 == 13, 4, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v366 = ifelse(v366 == 2, 0, v366)) %>%
  mutate(v364 = v364 - 6) %>%
  mutate(v364 = ifelse(v364 < 0, 1, v364)) %>%
  mutate(v364 = ifelse(v364 == 92, v367 - 6, v364)) %>%
  mutate(v364 = ifelse(v364 >= 25, 25, v364)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v144 = ifelse(v144 %in% c(2,3,NA), 0, v144)) %>% 
  mutate(v145 = ifelse(v145 %in% c(2,3,NA), 0, v145)) %>%
  mutate(v146 = ifelse(v146 %in% c(2,3,NA), 0, v146)) %>%
  mutate(v147 = ifelse(v147 %in% c(1,3,NA), 0, v147)) %>%
  mutate(v147 = ifelse(v147 %in% 2, 1, v147)) %>%
  mutate(v148 = ifelse(v148 %in% c(2,3,NA), 0, v148)) %>%
  mutate(v149 = ifelse(v149 %in% c(1,3,NA), 0, v149)) %>%
  mutate(v149 = ifelse(v149 %in% 2, 1, v149)) %>%
  mutate(v150 = ifelse(v150 %in% c(1,3,NA), 0, v150)) %>%
  mutate(v150 = ifelse(v150 %in% 2, 1, v150)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v366,
         age = v367,
         education = v364,
         know_q1 = v144, 
         know_q2 = v145, 
         know_q3 = v146, 
         know_q4 = v147, 
         know_q5 = v148, 
         know_q6 = v149, 
         know_q7 = v150, 
         left_right_placement = v362) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_1998_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 1999 ###
#######################

# Load the raw dataset
eb_1999_raw_df <- read_dta("data/analysis/eb_1999_small_raw.dta")

# Wrangle the dataset
eb_1999_df <- eb_1999_raw_df %>%
  filter(v6 != 14 & v6 != 15 & v6 != 16 & v6 != 17) %>%
  mutate(v4 = "1999") %>%
  mutate(v6 = ifelse(v6 == 13, 4, v6)) %>% 
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v803 = ifelse(v803 == 2, 0, v803)) %>%  
  mutate(v801 = v801 - 6) %>%
  mutate(v801 = ifelse(v801 < 0, 1, v801)) %>%
  mutate(v801 = ifelse(v801 == 92, v804 - 6, v801)) %>%
  mutate(v801 = ifelse(v801 >= 25, 25, v801)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v612 = ifelse(v612 %in% c(2,3,NA), 0, v612)) %>% 
  mutate(v613 = ifelse(v613 %in% c(2,3,NA), 0, v613)) %>%
  mutate(v614 = ifelse(v614 %in% c(2,3,NA), 0, v614)) %>%
  mutate(v615 = ifelse(v615 %in% c(2,3,NA), 0, v615)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v803,
         age = v804,
         education = v801,
         know_q1 = v612, 
         know_q2 = v613, 
         know_q3 = v614, 
         know_q4 = v615, 
         left_right_placement = v782) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_1999_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2002 ###
#######################

# Load the raw dataset
eb_2002_raw_df <- read_dta("data/analysis/eb_2002_small_raw.dta")

# Wrangle the dataset
eb_2002_df <- eb_2002_raw_df %>%
  filter(v6 != 14 & v6 != 15 & v6 != 16 & v6 != 17) %>%
  mutate(v4 = "2002") %>%
  mutate(v6 = ifelse(v6 == 13, 4, v6)) %>% 
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v419 = ifelse(v419 == 2, 0, v419)) %>%
  mutate(v417 = v417 - 6) %>%
  mutate(v417 = ifelse(v417 < 0, 1, v417)) %>%
  mutate(v417 = ifelse(v417 == 92, v419 - 6, v417)) %>%
  mutate(v417 = ifelse(v417 >= 25, 25, v417)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v296 = ifelse(v296 %in% c(1,3,NA), 0, v296)) %>% 
  mutate(v296 = ifelse(v296 == 2, 1, v296)) %>% 
  mutate(v297 = ifelse(v297 %in% c(1,3,NA), 0, v297)) %>%
  mutate(v297 = ifelse(v297 == 2, 1, v297)) %>% 
  mutate(v298 = ifelse(v298 %in% c(2,3,NA), 0, v298)) %>%
  mutate(v299 = ifelse(v299 %in% c(2,3,NA), 0, v299)) %>% 
  mutate(v300 = ifelse(v300 %in% c(2,3,NA), 0, v300)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v419,
         age = v420,
         education = v417,
         know_q1 = v296, 
         know_q2 = v297, 
         know_q3 = v298, 
         know_q4 = v299,  
         know_q5 = v300, 
         left_right_placement = v414,
         life_satisfaction = v40) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2002_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2003 ###
#######################

# Load the raw dataset
eb_2003_raw_df <- read_dta("data/analysis/eb_2003_small_raw.dta")

# Wrangle the dataset
eb_2003_df <- eb_2003_raw_df %>%
  filter(v6 != 14 & v6 != 15 & v6 != 16 & v6 != 17) %>%
  mutate(v4 = "2003") %>%
  mutate(v6 = ifelse(v6 == 13, 4, v6)) %>% 
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v330 = ifelse(v330 == 2, 0, v330)) %>%
  mutate(v328 = v328 - 6) %>%
  mutate(v328 = ifelse(v328 < 0, 1, v328)) %>%
  mutate(v328 = ifelse(v328 == 92, v331 - 6, v328)) %>%
  mutate(v328 = ifelse(v328 >= 25, 25, v328)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v151 = ifelse(v151 %in% c(1,3,NA), 0, v151)) %>%
  mutate(v151 = ifelse(v151 %in% 2, 1, v151)) %>%
  mutate(v152 = ifelse(v152 %in% c(1,3,NA), 0, v152)) %>%
  mutate(v152 = ifelse(v152 %in% 2, 1, v152)) %>%
  mutate(v153 = ifelse(v153 %in% c(2,3,NA), 0, v153)) %>%
  mutate(v154 = ifelse(v154 %in% c(1,3,NA), 0, v154)) %>%
  mutate(v154 = ifelse(v154 %in% 2, 1, v154)) %>%
  mutate(v155 = ifelse(v155 %in% c(1,3,NA), 0, v155)) %>%
  mutate(v155 = ifelse(v155 %in% 2, 1, v155)) %>%
  mutate(v156 = ifelse(v156 %in% c(2,3,NA), 0, v156)) %>%
  mutate(v157 = ifelse(v157 %in% c(1,3,NA), 0, v157)) %>%
  mutate(v157 = ifelse(v157 %in% 2, 1, v157)) %>%
  mutate(v158 = ifelse(v158 %in% c(2,3,NA), 0, v158)) %>%
  mutate(v159 = ifelse(v159 %in% c(2,3,NA), 0, v159)) %>%
  mutate(v160 = ifelse(v160 %in% c(1,3,NA), 0, v160)) %>%
  mutate(v160 = ifelse(v160 %in% 2, 1, v160)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v330,
         age = v331,
         education = v328,
         know_q1 = v151, 
         know_q2 = v152, 
         know_q3 = v153, 
         know_q4 = v154, 
         know_q5 = v155, 
         know_q6 = v156, 
         know_q7 = v157, 
         know_q8 = v158, 
         know_q9 = v159, 
         know_q10 = v160, 
         left_right_placement = v308) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2003_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2004 ###
#######################

# Load the raw dataset
eb_2004_raw_df <- read_dta("data/analysis/eb_2004_small_raw.dta")

# Wrangle the dataset
eb_2004_df <- eb_2004_raw_df %>%
  filter(v6 != 15 & v6 != 16 & v6 != 17 & v6 != 18 & v6 != 19  & v6 != 20 & v6 != 21 & v6 != 22 & v6 != 23 & v6 != 24 & v6 != 25 & v6 != 26 & v6 != 27 & v6 != 28 & v6 != 29 & v6 != 30 & v6 != 31 & v6 != 32 & v6 != 33) %>%
  mutate(v4 = "2004") %>%
  mutate(v6 = ifelse(v6 == 14, 4, v6)) %>% 
  mutate(v6 = ifelse(v6 == 10, 9, v6)) %>% 
  mutate(v6 = ifelse(v6 == 11, 10, v6)) %>%
  mutate(v6 = ifelse(v6 == 12, 11, v6)) %>%
  mutate(v6 = ifelse(v6 == 13, 12, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v428 = ifelse(v428 == 2, 0, v428)) %>%
  mutate(v426 = v426 - 6) %>%
  mutate(v426 = ifelse(v426 < 0, 0, v426)) %>%
  mutate(v426 = ifelse(v426 == 91, 0, v426)) %>%
  mutate(v426 = ifelse(v426 == 92, v429 - 6, v426)) %>%
  mutate(v426 = ifelse(v426 >= 25, 25, v426)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v211 = ifelse(v211 %in% c(1,3,NA), 0, v211)) %>% 
  mutate(v211 = ifelse(v211 == 2, 1, v211)) %>% 
  mutate(v212 = ifelse(v212 %in% c(2,3,NA), 0, v212)) %>%
  mutate(v213 = ifelse(v213 %in% c(1,3,NA), 0, v213)) %>% 
  mutate(v213 = ifelse(v213 == 2, 1, v213)) %>% 
  mutate(v214 = ifelse(v214 %in% c(2,3,NA), 0, v214)) %>%
  mutate(v215 = ifelse(v215 %in% c(2,3,NA), 0, v215)) %>% 
  mutate(v216 = ifelse(v216 %in% c(1,3,NA), 0, v216)) %>% 
  mutate(v216 = ifelse(v216 == 2, 1, v216)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v428,
         age = v429,
         education = v426,
         know_q1 = v211, 
         know_q2 = v212, 
         know_q3 = v213, 
         know_q4 = v214, 
         know_q5 = v215, 
         know_q6 = v216, 
         left_right_placement = v423,
         life_satisfaction = v69) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2004_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2005 ###
#######################

# Load the raw dataset
eb_2005_raw_df <- read_dta("data/analysis/eb_2005_small_raw.dta")

# Wrangle the dataset
eb_2005_df <- eb_2005_raw_df %>%
  filter(v6 != 15 & v6 != 16 & v6 != 17 & v6 != 18 & v6 != 19  & v6 != 20 & v6 != 21 & v6 != 22 & v6 != 23 & v6 != 24 & v6 != 25 & v6 != 26 & v6 != 27 & v6 != 28 & v6 != 29 & v6 != 30 & v6 != 31 & v6 != 32 & v6 != 33) %>%
  mutate(v4 = "2005") %>%
  mutate(v6 = ifelse(v6 == 14, 4, v6)) %>% 
  mutate(v6 = ifelse(v6 == 10, 9, v6)) %>% 
  mutate(v6 = ifelse(v6 == 11, 10, v6)) %>%
  mutate(v6 = ifelse(v6 == 12, 11, v6)) %>%
  mutate(v6 = ifelse(v6 == 13, 12, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v439 = ifelse(v439 == 2, 0, v439)) %>%
  mutate(v437 = v437 - 6) %>%
  mutate(v437 = ifelse(v437 < 0, 0, v437)) %>%
  mutate(v437 = ifelse(v437 == 91, 0, v437)) %>%
  mutate(v437 = ifelse(v437 == 92, v440 - 6, v437)) %>%
  mutate(v437 = ifelse(v437 >= 25, 25, v437)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v221 = ifelse(v221 %in% c(1,3,NA), 0, v221)) %>%
  mutate(v221 = ifelse(v221 %in% 2, 1, v221)) %>%
  mutate(v222 = ifelse(v222 %in% c(2,3,NA), 0, v222)) %>%
  mutate(v223 = ifelse(v223 %in% c(2,3,NA), 0, v223)) %>%
  mutate(v224 = ifelse(v224 %in% c(1,3,NA), 0, v224)) %>%
  mutate(v224 = ifelse(v224 %in% 2, 1, v224)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v439,
         age = v440,
         education = v437,
         know_q1 = v221, 
         know_q2 = v222, 
         know_q3 = v223, 
         know_q4 = v224, 
         left_right_placement = v434,
         life_satisfaction = v75) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2005_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2006 ###
#######################

# Load the raw dataset
eb_2006_raw_df <- read_dta("data/analysis/eb_2006_small_raw.dta")

# Wrangle the dataset
eb_2006_df <- eb_2006_raw_df %>%
  filter(v6 != 15 & v6 != 16 & v6 != 17 & v6 != 18 & v6 != 19  & v6 != 20 & v6 != 21 & v6 != 22 & v6 != 23 & v6 != 24 & v6 != 25 & v6 != 26 & v6 != 27 & v6 != 28 & v6 != 29 & v6 != 30 & v6 != 31 & v6 != 32 & v6 != 33) %>%
  mutate(v4 = "2006") %>%
  mutate(v6 = ifelse(v6 == 14, 4, v6)) %>% 
  mutate(v6 = ifelse(v6 == 10, 9, v6)) %>% 
  mutate(v6 = ifelse(v6 == 11, 10, v6)) %>%
  mutate(v6 = ifelse(v6 == 12, 11, v6)) %>%
  mutate(v6 = ifelse(v6 == 13, 12, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v462 = ifelse(v462 == 2, 0, v462)) %>% 
  mutate(v460 = v460 - 6) %>%
  mutate(v460 = ifelse(v460 < 0, 0, v460)) %>%
  mutate(v460 = ifelse(v460 == 91, 0, v460)) %>%
  mutate(v460 = ifelse(v460 == 92, v463 - 6, v460)) %>%
  mutate(v460 = ifelse(v460 >= 25, 25, v460)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v143 = ifelse(v143 %in% c(1,3,NA), 0, v143)) %>%
  mutate(v143 = ifelse(v143 %in% 2, 1, v143)) %>%
  mutate(v144 = ifelse(v144 %in% c(2,3,NA), 0, v144)) %>%
  mutate(v145 = ifelse(v145 %in% c(2,3,NA), 0, v145)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v462,
         age = v463,
         education = v460,
         know_q1 = v143, 
         know_q2 = v144, 
         know_q3 = v145, 
         left_right_placement = v457,
         life_satisfaction = v75) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2006_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2007 ###
#######################

# Load the raw dataset
eb_2007_raw_df <- read_dta("data/analysis/eb_2007_small_raw.dta")

# Wrangle the dataset
eb_2007_df <- eb_2007_raw_df %>%
  filter(v6 != 15 & v6 != 16 & v6 != 17 & v6 != 18 & v6 != 19  & v6 != 20 & v6 != 21 & v6 != 22 & v6 != 23 & v6 != 24 & v6 != 25 & v6 != 26 & v6 != 27 & v6 != 28 & v6 != 29 & v6 != 30 & v6 != 31 & v6 != 32 & v6 != 33 & v6 != 34) %>%
  mutate(v4 = "2007") %>%
  mutate(v6 = ifelse(v6 == 14, 4, v6)) %>% 
  mutate(v6 = ifelse(v6 == 10, 9, v6)) %>% 
  mutate(v6 = ifelse(v6 == 11, 10, v6)) %>%
  mutate(v6 = ifelse(v6 == 12, 11, v6)) %>%
  mutate(v6 = ifelse(v6 == 13, 12, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v420 = ifelse(v420 == 2, 0, v420)) %>% 
  mutate(v418 = v418 - 6) %>%
  mutate(v418 = ifelse(v418 < 0, 0, v418)) %>%
  mutate(v418 = ifelse(v418 == 91, 0, v418)) %>%
  mutate(v418 = ifelse(v418 == 92, v421 - 6, v418)) %>%
  mutate(v418 = ifelse(v418 >= 25, 25, v418)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v290 = ifelse(v290 %in% c(1,3,NA), 0, v290)) %>%
  mutate(v290 = ifelse(v290 %in% 2, 1, v290)) %>%
  mutate(v291 = ifelse(v291 %in% c(2,3,NA), 0, v291)) %>%
  mutate(v292 = ifelse(v292 %in% c(1,3,NA), 0, v292)) %>%
  mutate(v292 = ifelse(v292 %in% 2, 1, v292)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v420,
         age = v421,
         education = v418,
         know_q1 = v290, 
         know_q2 = v291, 
         know_q3 = v292, 
         left_right_placement = v415,
         life_satisfaction = v87) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2007_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2008 ###
#######################

# Load the raw dataset
eb_2008_raw_df <- read_dta("data/analysis/eb_2008_small_raw.dta")

# Wrangle the dataset
eb_2008_df <- eb_2008_raw_df %>%
  filter(v6 != 15 & v6 != 16 & v6 != 17 & v6 != 18 & v6 != 19  & v6 != 20 & v6 != 21 & v6 != 22 & v6 != 23 & v6 != 24 & v6 != 25 & v6 != 26 & v6 != 27 & v6 != 28 & v6 != 29 & v6 != 30 & v6 != 31 & v6 != 32 & v6 != 33 & v6 != 34) %>%
  mutate(v4 = "2008") %>%
  mutate(v6 = ifelse(v6 == 14, 4, v6)) %>% 
  mutate(v6 = ifelse(v6 == 10, 9, v6)) %>% 
  mutate(v6 = ifelse(v6 == 11, 10, v6)) %>%
  mutate(v6 = ifelse(v6 == 12, 11, v6)) %>%
  mutate(v6 = ifelse(v6 == 13, 12, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v670 = ifelse(v670 == 2, 0, v670)) %>% 
  mutate(v668 = v668 - 6) %>%
  mutate(v668 = ifelse(v668 < 0, 0, v668)) %>%
  mutate(v668 = ifelse(v668 == 91, 0, v668)) %>%
  mutate(v668 = ifelse(v668 == 92, v671 - 6, v668)) %>%
  mutate(v668 = ifelse(v668 >= 25, 25, v668)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v284 = ifelse(v284 %in% c(1,3,NA), 0, v284)) %>%
  mutate(v284 = ifelse(v284 %in% 2, 1, v284)) %>%
  mutate(v285 = ifelse(v285 %in% c(2,3,NA), 0, v285)) %>%
  mutate(v286 = ifelse(v286 %in% c(1,3,NA), 0, v286)) %>%
  mutate(v286 = ifelse(v286 %in% 2, 1, v286)) %>%
  mutate(v287 = ifelse(v287 %in% c(1,3,NA), 0, v287)) %>%
  mutate(v287 = ifelse(v287 %in% 2, 1, v287)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v670,
         age = v671,
         education = v668,
         know_q1 = v284, 
         know_q2 = v285, 
         know_q3 = v286, 
         know_q4 = v287, 
         left_right_placement = v665,
         life_satisfaction = v88) %>%
  write_rds("data/analysis/eb_2008_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2009 ###
#######################

# Load the raw dataset
eb_2009_raw_df <- read_dta("data/analysis/eb_2009_small_raw.dta")

# Wrangle the dataset
eb_2009_df <- eb_2009_raw_df %>%
  filter(v6 != 15 & v6 != 16 & v6 != 17 & v6 != 18 & v6 != 19  & v6 != 20 & v6 != 21 & v6 != 22 & v6 != 23 & v6 != 24 & v6 != 25 & v6 != 26 & v6 != 27 & v6 != 28 & v6 != 29 & v6 != 30 & v6 != 31 & v6 != 32 & v6 != 33 & v6 != 34) %>%
  mutate(v4 = "2009") %>%
  mutate(v6 = ifelse(v6 == 14, 4, v6)) %>% 
  mutate(v6 = ifelse(v6 == 10, 9, v6)) %>% 
  mutate(v6 = ifelse(v6 == 11, 10, v6)) %>%
  mutate(v6 = ifelse(v6 == 12, 11, v6)) %>%
  mutate(v6 = ifelse(v6 == 13, 12, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v584 = ifelse(v584 == 2, 0, v584)) %>%
  mutate(v582 = v582 - 6) %>%
  mutate(v582 = ifelse(v582 < 0, 0, v582)) %>%
  mutate(v582 = ifelse(v582 == 91, 0, v582)) %>%
  mutate(v582 = ifelse(v582 == 92, v585 - 6, v582)) %>%
  mutate(v582 = ifelse(v582 >= 25, 25, v582)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v266 = ifelse(v266 %in% c(1,3,NA), 0, v266)) %>% 
  mutate(v266 = ifelse(v266 == 2, 1, v266)) %>% 
  mutate(v267 = ifelse(v267 %in% c(2,3,NA), 0, v267)) %>%
  mutate(v268 = ifelse(v268 %in% c(1,3,NA), 0, v268)) %>% 
  mutate(v268 = ifelse(v268 == 2, 1, v268)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v584,
         age = v585,
         education = v582,
         know_q1 = v266, 
         know_q2 = v267, 
         know_q3 = v268, 
         left_right_placement = v578,
         life_satisfaction = v84) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2009_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2010 ###
#######################

# Load the raw dataset
eb_2010_raw_df <- read_dta("data/analysis/eb_2010_small_raw.dta")

# Wrangle the dataset
eb_2010_df <- eb_2010_raw_df %>%
  filter(v6 != 15 & v6 != 16 & v6 != 17 & v6 != 18 & v6 != 19  & v6 != 20 & v6 != 21 & v6 != 22 & v6 != 23 & v6 != 24 & v6 != 25 & v6 != 26 & v6 != 27 & v6 != 28 & v6 != 29 & v6 != 30 & v6 != 31 & v6 != 32 & v6 != 33 & v6 != 34 & v6 != 43) %>%
  mutate(v4 = "2010") %>% 
  mutate(v6 = ifelse(v6 == 14, 4, v6)) %>% 
  mutate(v6 = ifelse(v6 == 10, 9, v6)) %>% 
  mutate(v6 = ifelse(v6 == 11, 10, v6)) %>%
  mutate(v6 = ifelse(v6 == 12, 11, v6)) %>%
  mutate(v6 = ifelse(v6 == 13, 12, v6)) %>%
  mutate(ID = paste0(v4,v5)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(v602 = ifelse(v602 == 2, 0, v602)) %>% 
  mutate(v600 = v600 - 6) %>%
  mutate(v600 = ifelse(v600 < 0, 0, v600)) %>%
  mutate(v600 = ifelse(v600 == 91, 0, v600)) %>%
  mutate(v600 = ifelse(v600 == 92, v603 - 6, v600)) %>%
  mutate(v600 = ifelse(v600 >= 25, 25, v600)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(v335 = ifelse(v335 %in% c(2,3,NA), 0, v335)) %>%
  mutate(v336 = ifelse(v336 %in% c(2,3,NA), 0, v336)) %>%
  mutate(v337 = ifelse(v337 %in% c(1,3,NA), 0, v337)) %>%
  mutate(v337 = ifelse(v337 %in% 2, 1, v337)) %>%
  rename(wave_id = v5,
         year = v4,
         country = v6,
         gender = v602,
         age = v603,
         education = v600,
         know_q1 = v335, 
         know_q2 = v336, 
         know_q3 = v337, 
         left_right_placement = v595,
         life_satisfaction = v91) %>%
  write_rds("data/analysis/eb_2010_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2011 ###
#######################

# Load the raw dataset
eb_2011_raw_df <- read_dta("data/analysis/eb_2011_small_raw.dta")

# Wrangle the dataset
eb_2011_df <- eb_2011_raw_df %>%
  filter(country != 15 & country != 16 & country != 17 & country != 18 & country != 19  & country != 20 & country != 21 & country != 22 & country != 23 & country != 24 & country != 25 & country != 26 & country != 27 & country != 28 & country != 29 & country != 30 & country != 31 & country != 32 & country != 33 & country != 34 & country != 35  & country != 41  & country != 42 & country != 43 & country != 44) %>%
  mutate(survey = "2011") %>%
  mutate(country = ifelse(country == 14, 4, country)) %>% 
  mutate(country = ifelse(country == 10, 9, country)) %>% 
  mutate(country = ifelse(country == 11, 10, country)) %>%
  mutate(country = ifelse(country == 12, 11, country)) %>%
  mutate(country = ifelse(country == 13, 12, country)) %>%
  mutate(ID = paste0(survey,uniqid)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(d10 = ifelse(d10 == 2, 0, d10)) %>% 
  mutate(d8 = d8 - 6) %>%
  mutate(d8 = ifelse(d8 < 0, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 91, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 92, d11 - 6, d8)) %>%
  mutate(d8 = ifelse(d8 >= 25, 25, d8)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(qa15_1 = ifelse(qa15_1 %in% c(2,3,NA), 0, qa15_1)) %>%
  mutate(qa15_3 = ifelse(qa15_3 %in% c(1,3,NA), 0, qa15_3)) %>%
  mutate(qa15_3 = ifelse(qa15_3 %in% 2, 1, qa15_3)) %>%
  rename(wave_id = uniqid,
         year = survey,
         country = country,
         gender = d10,
         age = d11,
         education = d8,
         know_q1 = qa15_1, 
         know_q2 = qa15_3, 
         life_satisfaction = qa1) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2011_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2012 ###
#######################

# Load the raw dataset
eb_2012_raw_df <- read_dta("data/analysis/eb_2012_small_raw.dta")

# Wrangle the dataset
eb_2012_df <- eb_2012_raw_df %>%
  filter(country != 15 & country != 16 & country != 17 & country != 18 & country != 19  & country != 20 & country != 21 & country != 22 & country != 23 & country != 24 & country != 25 & country != 26 & country != 27 & country != 28 & country != 29 & country != 30 & country != 31 & country != 32 & country != 33 & country != 34 & country != 35  & country != 36 & country != 41  & country != 42 & country != 43 & country != 44) %>%
  mutate(survey = "2012") %>%
  mutate(country = ifelse(country == 14, 4, country)) %>% 
  mutate(country = ifelse(country == 10, 9, country)) %>% 
  mutate(country = ifelse(country == 11, 10, country)) %>%
  mutate(country = ifelse(country == 12, 11, country)) %>%
  mutate(country = ifelse(country == 13, 12, country)) %>%
  mutate(ID = paste0(survey,uniqid)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(d10 = ifelse(d10 == 2, 0, d10)) %>%
  mutate(d8 = d8 - 6) %>%
  mutate(d8 = ifelse(d8 < 0, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 91, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 92, d11 - 6, d8)) %>%
  mutate(d8 = ifelse(d8 >= 25, 25, d8)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(qa17_1 = ifelse(qa17_1 %in% c(2,3,NA), 0, qa17_1)) %>%
  mutate(qa17_2 = ifelse(qa17_2 %in% c(2,3,NA), 0, qa17_2)) %>%
  mutate(qa17_3 = ifelse(qa17_3 %in% c(1,3,NA), 0, qa17_3)) %>%
  mutate(qa17_3 = ifelse(qa17_3 %in% 2, 1, qa17_3)) %>%
  rename(wave_id = uniqid,
         year = survey,
         country = country,
         gender = d10,
         age = d11,
         education = d8,
         know_q1 = qa17_1, 
         know_q2 = qa17_2, 
         know_q3 = qa17_3, 
         life_satisfaction = qa1) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2012_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2013 ###
#######################

# Load the dataset
eb_2013_raw_df <- read_dta("data/analysis/eb_2013_small_raw.dta")

# Wrangle the dataset
eb_2013_df <- eb_2013_raw_df %>%
  filter(country != 15 & country != 16 & country != 17 & country != 18 & country != 19  & country != 20 & country != 21 & country != 22 & country != 23 & country != 24 & country != 25 & country != 26 & country != 27 & country != 28 & country != 29 & country != 30 & country != 31 & country != 32 & country != 33 & country != 34 & country != 35  & country != 36 & country != 41  & country != 42 & country != 43 & country != 44) %>%
  mutate(survey = "2013") %>%
  mutate(country = ifelse(country == 14, 4, country)) %>% 
  mutate(country = ifelse(country == 10, 9, country)) %>% 
  mutate(country = ifelse(country == 11, 10, country)) %>%
  mutate(country = ifelse(country == 12, 11, country)) %>%
  mutate(country = ifelse(country == 13, 12, country)) %>%
  mutate(ID = paste0(survey,uniqid)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(d10 = ifelse(d10 == 2, 0, d10)) %>% 
  mutate(d8 = d8 - 6) %>%
  mutate(d8 = ifelse(d8 < 0, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 91, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 92, d11 - 6, d8)) %>%
  mutate(d8 = ifelse(d8 >= 25, 25, d8)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(qa16_1 = ifelse(qa16_1 %in% c(2,3,NA), 0, qa16_1)) %>%
  mutate(qa16_2 = ifelse(qa16_2 %in% c(2,3,NA), 0, qa16_2)) %>%
  mutate(qa16_3 = ifelse(qa16_3 %in% c(1,3,NA), 0, qa16_3)) %>%
  mutate(qa16_3 = ifelse(qa16_3 %in% 2, 1, qa16_3)) %>%
  rename(wave_id = uniqid,
         year = survey,
         country = country,
         gender = d10,
         age = d11,
         education = d8,
         know_q1 = qa16_1, 
         know_q2 = qa16_2,
         know_q3 = qa16_3, 
         life_satisfaction = qa1) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2013_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2014 ###
#######################

# Load the raw dataset
eb_2014_raw_df <- read_dta("data/analysis/eb_2014_small_raw.dta")

# Wrangle the dataset
eb_2014_df <- eb_2014_raw_df %>%
  filter(country != 15 & country != 16 & country != 17 & country != 18 & country != 19  & country != 20 & country != 21 & country != 22 & country != 23 & country != 24 & country != 25 & country != 26 & country != 27 & country != 28 & country != 29 & country != 30 & country != 31 & country != 32 & country != 33 & country != 34 & country != 35  & country != 36 & country != 41  & country != 42 & country != 43 & country != 44) %>%
  mutate(survey = "2014") %>%
  mutate(country = ifelse(country == 14, 4, country)) %>% 
  mutate(country = ifelse(country == 10, 9, country)) %>% 
  mutate(country = ifelse(country == 11, 10, country)) %>%
  mutate(country = ifelse(country == 12, 11, country)) %>%
  mutate(country = ifelse(country == 13, 12, country)) %>%
  mutate(ID = paste0(survey,uniqid)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(d10 = ifelse(d10 == 2, 0, d10)) %>% 
  mutate(d8 = d8 - 6) %>%
  mutate(d8 = ifelse(d8 < 0, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 91, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 92, d11 - 6, d8)) %>%
  mutate(d8 = ifelse(d8 >= 25, 25, d8)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(qa16_1 = ifelse(qa16_1 %in% c(2,3,NA), 0, qa16_1)) %>%
  mutate(qa16_2 = ifelse(qa16_2 %in% c(2,3,NA), 0, qa16_2)) %>%
  mutate(qa16_3 = ifelse(qa16_3 %in% c(1,3,NA), 0, qa16_3)) %>%
  mutate(qa16_3 = ifelse(qa16_3 %in% 2, 1, qa16_3)) %>%
  rename(wave_id = uniqid,
         year = survey,
         country = country,
         gender = d10,
         age = d11,
         education = d8,
         know_q1 = qa16_1, 
         know_q2 = qa16_2, 
         know_q3 = qa16_3, 
         left_right_placement = d1r1,
         life_satisfaction = qa1) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2014_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2015 ###
#######################

# Load the raw dataset
eb_2015_raw_df <- read_dta("data/analysis/eb_2015_small_raw.dta")

# Wrangle the dataset
eb_2015_df <- eb_2015_raw_df %>%
  filter(country != 15 & country != 16 & country != 17 & country != 18 & country != 19  & country != 20 & country != 21 & country != 22 & country != 23 & country != 24 & country != 25 & country != 26 & country != 27 & country != 28 & country != 29 & country != 30 & country != 31 & country != 32 & country != 33 & country != 34 & country != 35  & country != 36 & country != 37 & country != 41  & country != 42 & country != 43 & country != 44) %>%
  mutate(survey = "2015") %>%
  mutate(country = ifelse(country == 14, 4, country)) %>% 
  mutate(country = ifelse(country == 10, 9, country)) %>% 
  mutate(country = ifelse(country == 11, 10, country)) %>%
  mutate(country = ifelse(country == 12, 11, country)) %>%
  mutate(country = ifelse(country == 13, 12, country)) %>%
  mutate(ID = paste0(survey,uniqid)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(d10 = ifelse(d10 == 2, 0, d10)) %>%
  mutate(d8 = d8 - 6) %>%
  mutate(d8 = ifelse(d8 < 0, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 91, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 92, d11 - 6, d8)) %>%
  mutate(d8 = ifelse(d8 >= 25, 25, d8)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(qa14_1 = ifelse(qa14_1 %in% c(2,3,NA), 0, qa14_1)) %>%
  mutate(qa14_2 = ifelse(qa14_2 %in% c(2,3,NA), 0, qa14_2)) %>%
  mutate(qa14_3 = ifelse(qa14_3 %in% c(1,3,NA), 0, qa14_3)) %>%
  mutate(qa14_3 = ifelse(qa14_3 %in% 2, 1, qa14_3)) %>%
  rename(wave_id = uniqid,
         year = survey,
         country = country,
         gender = d10,
         age = d11,
         education = d8,
         know_q1 = qa14_1, 
         know_q2 = qa14_2, 
         know_q3 = qa14_3, 
         left_right_placement = d1r1,
         life_satisfaction = d70) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2015_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2016 ###
#######################

# Load the raw dataset
eb_2016_raw_df <- read_dta("data/analysis/eb_2016_small_raw.dta")

# Wrangle the dataset
eb_2016_df <- eb_2016_raw_df %>%
  filter(country != 15 & country != 16 & country != 17 & country != 18 & country != 19  & country != 20 & country != 21 & country != 22 & country != 23 & country != 24 & country != 25 & country != 26 & country != 27 & country != 28 & country != 29 & country != 30 & country != 31 & country != 32 & country != 33 & country != 34 & country != 35  & country != 36 & country != 37 & country != 41  & country != 42 & country != 43 & country != 44) %>%
  mutate(survey = "2016") %>%
  mutate(country = ifelse(country == 14, 4, country)) %>% 
  mutate(country = ifelse(country == 10, 9, country)) %>% 
  mutate(country = ifelse(country == 11, 10, country)) %>%
  mutate(country = ifelse(country == 12, 11, country)) %>%
  mutate(country = ifelse(country == 13, 12, country)) %>%
  mutate(ID = paste0(survey,uniqid)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(d10 = ifelse(d10 == 2, 0, d10)) %>% 
  mutate(d8 = d8 - 6) %>%
  mutate(d8 = ifelse(d8 < 0, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 91, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 92, d11 - 6, d8)) %>%
  mutate(d8 = ifelse(d8 == 93, NA, d8)) %>%
  mutate(d8 = ifelse(d8 >= 25, 25, d8)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(qa16_1 = ifelse(qa16_1 %in% c(2,3,NA), 0, qa16_1)) %>%
  mutate(qa16_2 = ifelse(qa16_2 %in% c(2,3,NA), 0, qa16_2)) %>%
  mutate(qa16_3 = ifelse(qa16_3 %in% c(1,3,NA), 0, qa16_3)) %>%
  mutate(qa16_3 = ifelse(qa16_3 %in% 2, 1, qa16_3)) %>%
  rename(wave_id = uniqid,
         year = survey,
         country = country,
         gender = d10,
         age = d11,
         education = d8,
         know_q1 = qa16_1, 
         know_q2 = qa16_2, 
         know_q3 = qa16_3, 
         left_right_placement = d1r1,
         life_satisfaction = d70) %>%
  remove_var_label() %>%
  write_rds("data/analysis/eb_2016_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2017 ###
#######################

# Load the raw dataset
eb_2017_raw_df <- read_dta("data/analysis/eb_2017_small_raw.dta")

# Wrangle the dataset
eb_2017_df <- eb_2017_raw_df %>%
  filter(country != 15 & country != 16 & country != 17 & country != 18 & country != 19  & country != 20 & country != 21 & country != 22 & country != 23 & country != 24 & country != 25 & country != 26 & country != 27 & country != 28 & country != 29 & country != 30 & country != 31 & country != 32 & country != 33 & country != 34 & country != 35  & country != 36 & country != 37 & country != 41  & country != 42 & country != 43 & country != 44) %>%
  mutate(survey = "2017") %>%
  mutate(country = ifelse(country == 14, 4, country)) %>% 
  mutate(country = ifelse(country == 10, 9, country)) %>% 
  mutate(country = ifelse(country == 11, 10, country)) %>%
  mutate(country = ifelse(country == 12, 11, country)) %>%
  mutate(country = ifelse(country == 13, 12, country)) %>%
  mutate(ID = paste0(survey,uniqid)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(d10 = ifelse(d10 == 2, 0, d10)) %>%
  mutate(d8 = d8 - 6) %>%
  mutate(d8 = ifelse(d8 < 0, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 91, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 92, d11 - 6, d8)) %>%
  mutate(d8 = ifelse(d8 == 93, NA, d8)) %>%
  mutate(d8 = ifelse(d8 >= 25, 25, d8)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(qa15_1 = ifelse(qa15_1 %in% c(2,3,NA), 0, qa15_1)) %>%
  mutate(qa15_2 = ifelse(qa15_2 %in% c(2,3,NA), 0, qa15_2)) %>%
  mutate(qa15_3 = ifelse(qa15_3 %in% c(1,3,NA), 0, qa15_3)) %>%
  mutate(qa15_3 = ifelse(qa15_3 %in% 2, 1, qa15_3)) %>%
  rename(wave_id = uniqid,
         year = survey,
         country = country,
         gender = d10,
         age = d11,
         education = d8,
         know_q1 = qa15_1, 
         know_q2 = qa15_2, 
         know_q3 = qa15_3, 
         left_right_placement = d1r1,
         life_satisfaction = d70) %>%
  write_rds("data/analysis/eb_2017_df.rds") %>%
  glimpse()

#######################
### WRANGLE EB 2018 ###
#######################

# Load the raw dataset
eb_2018_raw_df <- read_dta("data/analysis/eb_2018_small_raw.dta")

# Wrangle the dataset
eb_2018_df <- eb_2018_raw_df %>%
  filter(country != 15 & country != 16 & country != 17 & country != 18 & country != 19  & country != 20 & country != 21 & country != 22 & country != 23 & country != 24 & country != 25 & country != 26 & country != 27 & country != 28 & country != 29 & country != 30 & country != 31 & country != 32 & country != 33 & country != 34 & country != 35  & country != 36 & country != 37 & country != 41  & country != 42 & country != 43 & country != 44) %>%
  mutate(survey = "2018") %>%
  mutate(country = ifelse(country == 14, 4, country)) %>% 
  mutate(country = ifelse(country == 10, 9, country)) %>% 
  mutate(country = ifelse(country == 11, 10, country)) %>%
  mutate(country = ifelse(country == 12, 11, country)) %>%
  mutate(country = ifelse(country == 13, 12, country)) %>%
  mutate(ID = paste0(survey,uniqid)) %>%
  mutate(ID = as.numeric(ID)) %>%
  mutate(d10 = ifelse(d10 == 2, 0, d10)) %>%
  mutate(d8 = d8 - 6) %>%
  mutate(d8 = ifelse(d8 < 0, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 91, 0, d8)) %>%
  mutate(d8 = ifelse(d8 == 92, d11 - 6, d8)) %>%
  mutate(d8 = ifelse(d8 == 93, NA, d8)) %>%
  mutate(d8 = ifelse(d8 >= 25, 25, d8)) %>%
  #reclassify knowledge variables (leave 1=correct, mutate 2,3 to 0 - incorrect)
  mutate(qa14_1 = ifelse(qa14_1 %in% c(2,3,9,NA), 0, qa14_1)) %>%
  mutate(qa14_2 = ifelse(qa14_2 %in% c(2,3,NA), 0, qa14_2)) %>%
  mutate(qa14_3 = ifelse(qa14_3 %in% c(1,3,NA), 0, qa14_3)) %>%
  mutate(qa14_3 = ifelse(qa14_3 %in% 2, 1, qa14_3)) %>%
  rename(wave_id = uniqid,
         year = survey,
         country = country,
         gender = d10,
         age = d11,
         education = d8,
         know_q1 = qa14_1, 
         know_q2 = qa14_2, 
         know_q3 = qa14_3, 
         left_right_placement = d1r1,
         life_satisfaction = d70) %>%
  write_rds("data/analysis/eb_2018_df.rds") %>%
  glimpse()

############################
### STACK THE DATAFRAMES ###
############################

eb_df <- bind_rows(eb_1992_df,eb_1993_df, eb_1994_df, eb_1996_df, 
                   eb_1998_df, eb_1999_df, eb_2002_df, eb_2003_df,
                   eb_2004_df, eb_2005_df, eb_2006_df, eb_2007_df,
                   eb_2008_df, eb_2009_df, eb_2010_df, eb_2011_df,
                   eb_2012_df, eb_2013_df, eb_2014_df, eb_2015_df, 
                   eb_2016_df, eb_2017_df, eb_2018_df)

##############################
### Reorganize new dataframe
##############################

eb_df <- eb_df %>%
  mutate(ID = row_number()) %>%
  dplyr::select(ID, know_q1:know_q10, country, year, gender, age, education,
                left_right_placement, life_satisfaction, wave_id) %>%
  glimpse()


######################## 
### Reshape dataset  ###
########################


eb_long_df <- eb_df %>%
  pivot_longer(cols = starts_with("know"),
               names_to = "know_q_num_1",
               values_to = "know_correct") %>%
  mutate(know_q_num = case_when(know_q_num_1 == "know_q1" ~ 1,
                                know_q_num_1 == "know_q2" ~ 2,
                                know_q_num_1 == "know_q3" ~ 3,
                                know_q_num_1 == "know_q4" ~ 4,
                                know_q_num_1 == "know_q5" ~ 5,
                                know_q_num_1 == "know_q6" ~ 6,
                                know_q_num_1 == "know_q7" ~ 7,
                                know_q_num_1 == "know_q8" ~ 8,
                                know_q_num_1 == "know_q9" ~ 9,
                                know_q_num_1 == "know_q10" ~ 10)) %>%
  select(-know_q_num_1) %>%
  mutate(year = as.numeric(year)) %>%
  relocate(ID, country, year, know_q_num, know_correct, gender) %>%
  glimpse()

#############################################
#############################################
### Import and reshape country level vars ###
#############################################
#############################################

country_level_vars <- import("data/raw/country_level_vars.xlsx")

country_level_vars_long <- country_level_vars %>%
  pivot_longer(cols = starts_with(c("GDPgrowth_", "pop_", 
                                    "Wparl_", "quota_", "counter_")),
              names_to = c(".value", "year"), 
              names_sep = "_") %>%
  arrange(country, year) %>%
  rename(GDP_growth = GDPgrowth,
         women_in_parl = Wparl) %>%
  relocate(country, year, name) %>%
  mutate(year = as.numeric(year)) %>%
  glimpse()

################################
### Merge the two dataframes ###
################################

analysis <- left_join(eb_long_df,
                      country_level_vars_long,
                      by = c("country", "year"))

analysis.2 <- analysis %>%
  drop_na(know_correct, education) %>% 
  mutate(education = ifelse(education < 0, 0, education)) %>%
  write_rds("data/analysis/analysis_2_df.rds") %>%
  glimpse()

########################################
########################################
### Table A1: Descriptive Statistics ###
########################################

# Load the dataset 
analysis.2 <- readRDS("data/analysis_2/analysis.2.rds")
analysis.2.binomial <- readRDS("data/analysis_2/analysis.2.binomial.rds")

# Descriptive Statistics
analysis.2.binomial <- as.data.frame(analysis.2.binomial)

stargazer(analysis.2.binomial, type = "latex",
          title="Descriptive Statistics", 
          digits=1, out="draft/table1_appendix.tex")






